{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "K-6dTAbX1Ngw",
    "outputId": "a255c986-9791-418c-9d46-5dd850953eaf"
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "\u001b[?25l     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/82.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.5/82.5 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m866.9/866.9 kB\u001b[0m \u001b[31m27.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.7/7.7 MB\u001b[0m \u001b[31m112.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m431.5/431.5 kB\u001b[0m \u001b[31m44.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.7/138.7 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m45.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.9/1.9 MB\u001b[0m \u001b[31m105.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m276.3/276.3 kB\u001b[0m \u001b[31m32.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m103.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m97.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m90.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.0/77.0 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.4/143.4 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.0/73.0 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.3/66.3 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m693.9/693.9 kB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.7/45.7 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.9/69.9 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m805.2/805.2 kB\u001b[0m \u001b[31m55.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.5/59.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m727.7/727.7 kB\u001b[0m \u001b[31m56.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.9/112.9 kB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m295.0/295.0 kB\u001b[0m \u001b[31m34.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m105.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m81.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m25.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.9/172.9 kB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m114.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.4/58.4 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m30.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m46.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h"
     ]
    }
   ],
   "source": [
    "!pip install -qU nougat-ocr llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 74
    },
    "id": "p2-UKt661VdN",
    "outputId": "0f285e62-9bdc-4ec7-85f7-4d6d92895c90"
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ],
      "text/html": [
       "\n",
       "     <input type=\"file\" id=\"files-62158d4b-5d42-42c6-9cb3-d9bb63c6ecc4\" name=\"files[]\" multiple disabled\n",
       "        style=\"border:none\" />\n",
       "     <output id=\"result-62158d4b-5d42-42c6-9cb3-d9bb63c6ecc4\">\n",
       "      Upload widget is only available when the cell has been executed in the\n",
       "      current browser session. Please rerun this cell to enable.\n",
       "      </output>\n",
       "      <script>// Copyright 2017 Google LLC\n",
       "//\n",
       "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
       "// you may not use this file except in compliance with the License.\n",
       "// You may obtain a copy of the License at\n",
       "//\n",
       "//      http://www.apache.org/licenses/LICENSE-2.0\n",
       "//\n",
       "// Unless required by applicable law or agreed to in writing, software\n",
       "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
       "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
       "// See the License for the specific language governing permissions and\n",
       "// limitations under the License.\n",
       "\n",
       "/**\n",
       " * @fileoverview Helpers for google.colab Python module.\n",
       " */\n",
       "(function(scope) {\n",
       "function span(text, styleAttributes = {}) {\n",
       "  const element = document.createElement('span');\n",
       "  element.textContent = text;\n",
       "  for (const key of Object.keys(styleAttributes)) {\n",
       "    element.style[key] = styleAttributes[key];\n",
       "  }\n",
       "  return element;\n",
       "}\n",
       "\n",
       "// Max number of bytes which will be uploaded at a time.\n",
       "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
       "\n",
       "function _uploadFiles(inputId, outputId) {\n",
       "  const steps = uploadFilesStep(inputId, outputId);\n",
       "  const outputElement = document.getElementById(outputId);\n",
       "  // Cache steps on the outputElement to make it available for the next call\n",
       "  // to uploadFilesContinue from Python.\n",
       "  outputElement.steps = steps;\n",
       "\n",
       "  return _uploadFilesContinue(outputId);\n",
       "}\n",
       "\n",
       "// This is roughly an async generator (not supported in the browser yet),\n",
       "// where there are multiple asynchronous steps and the Python side is going\n",
       "// to poll for completion of each step.\n",
       "// This uses a Promise to block the python side on completion of each step,\n",
       "// then passes the result of the previous step as the input to the next step.\n",
       "function _uploadFilesContinue(outputId) {\n",
       "  const outputElement = document.getElementById(outputId);\n",
       "  const steps = outputElement.steps;\n",
       "\n",
       "  const next = steps.next(outputElement.lastPromiseValue);\n",
       "  return Promise.resolve(next.value.promise).then((value) => {\n",
       "    // Cache the last promise value to make it available to the next\n",
       "    // step of the generator.\n",
       "    outputElement.lastPromiseValue = value;\n",
       "    return next.value.response;\n",
       "  });\n",
       "}\n",
       "\n",
       "/**\n",
       " * Generator function which is called between each async step of the upload\n",
       " * process.\n",
       " * @param {string} inputId Element ID of the input file picker element.\n",
       " * @param {string} outputId Element ID of the output display.\n",
       " * @return {!Iterable<!Object>} Iterable of next steps.\n",
       " */\n",
       "function* uploadFilesStep(inputId, outputId) {\n",
       "  const inputElement = document.getElementById(inputId);\n",
       "  inputElement.disabled = false;\n",
       "\n",
       "  const outputElement = document.getElementById(outputId);\n",
       "  outputElement.innerHTML = '';\n",
       "\n",
       "  const pickedPromise = new Promise((resolve) => {\n",
       "    inputElement.addEventListener('change', (e) => {\n",
       "      resolve(e.target.files);\n",
       "    });\n",
       "  });\n",
       "\n",
       "  const cancel = document.createElement('button');\n",
       "  inputElement.parentElement.appendChild(cancel);\n",
       "  cancel.textContent = 'Cancel upload';\n",
       "  const cancelPromise = new Promise((resolve) => {\n",
       "    cancel.onclick = () => {\n",
       "      resolve(null);\n",
       "    };\n",
       "  });\n",
       "\n",
       "  // Wait for the user to pick the files.\n",
       "  const files = yield {\n",
       "    promise: Promise.race([pickedPromise, cancelPromise]),\n",
       "    response: {\n",
       "      action: 'starting',\n",
       "    }\n",
       "  };\n",
       "\n",
       "  cancel.remove();\n",
       "\n",
       "  // Disable the input element since further picks are not allowed.\n",
       "  inputElement.disabled = true;\n",
       "\n",
       "  if (!files) {\n",
       "    return {\n",
       "      response: {\n",
       "        action: 'complete',\n",
       "      }\n",
       "    };\n",
       "  }\n",
       "\n",
       "  for (const file of files) {\n",
       "    const li = document.createElement('li');\n",
       "    li.append(span(file.name, {fontWeight: 'bold'}));\n",
       "    li.append(span(\n",
       "        `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
       "        `last modified: ${\n",
       "            file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
       "                                    'n/a'} - `));\n",
       "    const percent = span('0% done');\n",
       "    li.appendChild(percent);\n",
       "\n",
       "    outputElement.appendChild(li);\n",
       "\n",
       "    const fileDataPromise = new Promise((resolve) => {\n",
       "      const reader = new FileReader();\n",
       "      reader.onload = (e) => {\n",
       "        resolve(e.target.result);\n",
       "      };\n",
       "      reader.readAsArrayBuffer(file);\n",
       "    });\n",
       "    // Wait for the data to be ready.\n",
       "    let fileData = yield {\n",
       "      promise: fileDataPromise,\n",
       "      response: {\n",
       "        action: 'continue',\n",
       "      }\n",
       "    };\n",
       "\n",
       "    // Use a chunked sending to avoid message size limits. See b/62115660.\n",
       "    let position = 0;\n",
       "    do {\n",
       "      const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
       "      const chunk = new Uint8Array(fileData, position, length);\n",
       "      position += length;\n",
       "\n",
       "      const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
       "      yield {\n",
       "        response: {\n",
       "          action: 'append',\n",
       "          file: file.name,\n",
       "          data: base64,\n",
       "        },\n",
       "      };\n",
       "\n",
       "      let percentDone = fileData.byteLength === 0 ?\n",
       "          100 :\n",
       "          Math.round((position / fileData.byteLength) * 100);\n",
       "      percent.textContent = `${percentDone}% done`;\n",
       "\n",
       "    } while (position < fileData.byteLength);\n",
       "  }\n",
       "\n",
       "  // All done.\n",
       "  yield {\n",
       "    response: {\n",
       "      action: 'complete',\n",
       "    }\n",
       "  };\n",
       "}\n",
       "\n",
       "scope.google = scope.google || {};\n",
       "scope.google.colab = scope.google.colab || {};\n",
       "scope.google.colab._files = {\n",
       "  _uploadFiles,\n",
       "  _uploadFilesContinue,\n",
       "};\n",
       "})(self);\n",
       "</script> "
      ]
     },
     "metadata": {}
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Saving base.py to base.py\n"
     ]
    }
   ],
   "source": [
    "from google.colab import files\n",
    "\n",
    "upload = files.upload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "72yVLCty1YOT",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 74
    },
    "outputId": "212a0a5c-67fe-41bc-a162-d47bacaee4ba"
   },
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ],
      "text/html": [
       "\n",
       "     <input type=\"file\" id=\"files-2c47753b-8d1f-4437-aa0c-79643eab47ba\" name=\"files[]\" multiple disabled\n",
       "        style=\"border:none\" />\n",
       "     <output id=\"result-2c47753b-8d1f-4437-aa0c-79643eab47ba\">\n",
       "      Upload widget is only available when the cell has been executed in the\n",
       "      current browser session. Please rerun this cell to enable.\n",
       "      </output>\n",
       "      <script>// Copyright 2017 Google LLC\n",
       "//\n",
       "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
       "// you may not use this file except in compliance with the License.\n",
       "// You may obtain a copy of the License at\n",
       "//\n",
       "//      http://www.apache.org/licenses/LICENSE-2.0\n",
       "//\n",
       "// Unless required by applicable law or agreed to in writing, software\n",
       "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
       "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
       "// See the License for the specific language governing permissions and\n",
       "// limitations under the License.\n",
       "\n",
       "/**\n",
       " * @fileoverview Helpers for google.colab Python module.\n",
       " */\n",
       "(function(scope) {\n",
       "function span(text, styleAttributes = {}) {\n",
       "  const element = document.createElement('span');\n",
       "  element.textContent = text;\n",
       "  for (const key of Object.keys(styleAttributes)) {\n",
       "    element.style[key] = styleAttributes[key];\n",
       "  }\n",
       "  return element;\n",
       "}\n",
       "\n",
       "// Max number of bytes which will be uploaded at a time.\n",
       "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
       "\n",
       "function _uploadFiles(inputId, outputId) {\n",
       "  const steps = uploadFilesStep(inputId, outputId);\n",
       "  const outputElement = document.getElementById(outputId);\n",
       "  // Cache steps on the outputElement to make it available for the next call\n",
       "  // to uploadFilesContinue from Python.\n",
       "  outputElement.steps = steps;\n",
       "\n",
       "  return _uploadFilesContinue(outputId);\n",
       "}\n",
       "\n",
       "// This is roughly an async generator (not supported in the browser yet),\n",
       "// where there are multiple asynchronous steps and the Python side is going\n",
       "// to poll for completion of each step.\n",
       "// This uses a Promise to block the python side on completion of each step,\n",
       "// then passes the result of the previous step as the input to the next step.\n",
       "function _uploadFilesContinue(outputId) {\n",
       "  const outputElement = document.getElementById(outputId);\n",
       "  const steps = outputElement.steps;\n",
       "\n",
       "  const next = steps.next(outputElement.lastPromiseValue);\n",
       "  return Promise.resolve(next.value.promise).then((value) => {\n",
       "    // Cache the last promise value to make it available to the next\n",
       "    // step of the generator.\n",
       "    outputElement.lastPromiseValue = value;\n",
       "    return next.value.response;\n",
       "  });\n",
       "}\n",
       "\n",
       "/**\n",
       " * Generator function which is called between each async step of the upload\n",
       " * process.\n",
       " * @param {string} inputId Element ID of the input file picker element.\n",
       " * @param {string} outputId Element ID of the output display.\n",
       " * @return {!Iterable<!Object>} Iterable of next steps.\n",
       " */\n",
       "function* uploadFilesStep(inputId, outputId) {\n",
       "  const inputElement = document.getElementById(inputId);\n",
       "  inputElement.disabled = false;\n",
       "\n",
       "  const outputElement = document.getElementById(outputId);\n",
       "  outputElement.innerHTML = '';\n",
       "\n",
       "  const pickedPromise = new Promise((resolve) => {\n",
       "    inputElement.addEventListener('change', (e) => {\n",
       "      resolve(e.target.files);\n",
       "    });\n",
       "  });\n",
       "\n",
       "  const cancel = document.createElement('button');\n",
       "  inputElement.parentElement.appendChild(cancel);\n",
       "  cancel.textContent = 'Cancel upload';\n",
       "  const cancelPromise = new Promise((resolve) => {\n",
       "    cancel.onclick = () => {\n",
       "      resolve(null);\n",
       "    };\n",
       "  });\n",
       "\n",
       "  // Wait for the user to pick the files.\n",
       "  const files = yield {\n",
       "    promise: Promise.race([pickedPromise, cancelPromise]),\n",
       "    response: {\n",
       "      action: 'starting',\n",
       "    }\n",
       "  };\n",
       "\n",
       "  cancel.remove();\n",
       "\n",
       "  // Disable the input element since further picks are not allowed.\n",
       "  inputElement.disabled = true;\n",
       "\n",
       "  if (!files) {\n",
       "    return {\n",
       "      response: {\n",
       "        action: 'complete',\n",
       "      }\n",
       "    };\n",
       "  }\n",
       "\n",
       "  for (const file of files) {\n",
       "    const li = document.createElement('li');\n",
       "    li.append(span(file.name, {fontWeight: 'bold'}));\n",
       "    li.append(span(\n",
       "        `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
       "        `last modified: ${\n",
       "            file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
       "                                    'n/a'} - `));\n",
       "    const percent = span('0% done');\n",
       "    li.appendChild(percent);\n",
       "\n",
       "    outputElement.appendChild(li);\n",
       "\n",
       "    const fileDataPromise = new Promise((resolve) => {\n",
       "      const reader = new FileReader();\n",
       "      reader.onload = (e) => {\n",
       "        resolve(e.target.result);\n",
       "      };\n",
       "      reader.readAsArrayBuffer(file);\n",
       "    });\n",
       "    // Wait for the data to be ready.\n",
       "    let fileData = yield {\n",
       "      promise: fileDataPromise,\n",
       "      response: {\n",
       "        action: 'continue',\n",
       "      }\n",
       "    };\n",
       "\n",
       "    // Use a chunked sending to avoid message size limits. See b/62115660.\n",
       "    let position = 0;\n",
       "    do {\n",
       "      const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
       "      const chunk = new Uint8Array(fileData, position, length);\n",
       "      position += length;\n",
       "\n",
       "      const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
       "      yield {\n",
       "        response: {\n",
       "          action: 'append',\n",
       "          file: file.name,\n",
       "          data: base64,\n",
       "        },\n",
       "      };\n",
       "\n",
       "      let percentDone = fileData.byteLength === 0 ?\n",
       "          100 :\n",
       "          Math.round((position / fileData.byteLength) * 100);\n",
       "      percent.textContent = `${percentDone}% done`;\n",
       "\n",
       "    } while (position < fileData.byteLength);\n",
       "  }\n",
       "\n",
       "  // All done.\n",
       "  yield {\n",
       "    response: {\n",
       "      action: 'complete',\n",
       "    }\n",
       "  };\n",
       "}\n",
       "\n",
       "scope.google = scope.google || {};\n",
       "scope.google.colab = scope.google.colab || {};\n",
       "scope.google.colab._files = {\n",
       "  _uploadFiles,\n",
       "  _uploadFilesContinue,\n",
       "};\n",
       "})(self);\n",
       "</script> "
      ]
     },
     "metadata": {}
    },
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Saving mathpaper.pdf to mathpaper.pdf\n"
     ]
    }
   ],
   "source": [
    "from google.colab import files\n",
    "\n",
    "upload = files.upload()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "id": "g-lQnl8d10an"
   },
   "outputs": [],
   "source": [
    "from base import PDFNougatOCR\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "CfvTAxc85QRj"
   },
   "outputs": [],
   "source": [
    "reader = PDFNougatOCR()\n",
    "pdf_path = Path(\"mathpaper.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "FhTLiJz75R-_"
   },
   "outputs": [],
   "source": [
    "docs = reader.load_data(pdf_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "id": "GVzIoEOK5UTa",
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "outputId": "0b664683-f3f0-4282-ae1b-b9973125d832"
   },
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "metadata": {},
     "execution_count": 10
    }
   ],
   "source": [
    "len(docs)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": [],
   "gpuType": "T4"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}